# Download and extract the complete list of ports ( official and unofficial ) from Wikipedia
# make the program executable and simply run with no arguments.


#!/bin/bash                                                 
# Gets Port List from Wikipedia, includes both official and unofficial ports
# Saves as ports.lst and backsup the previous list                          

#Variables
DLFILE="htmlPortList"
DLURL="http://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers"
OFILE="ports.lst"                                                    

# checks if the file exists, if it does back it up
if [ -e $OFILE ]; then                            
        mv $OFILE ${OFILE}.bak                    
fi                                                

# get the file
# remove space after w
w get -q -O $DLFILE $DLURL

# sed commands to edit the file

# get the middle of the list
# then delete the last two lines that were just extra crap
# remove all current newlines
# remove all of the rows and replace them with newlines
# remove all the columns and replace them with tabs
# remove the rest of the html tags

sed  -n '/^<td>0*\/TCP,UDP*/,/<p><a name=\"Dynamic/p' $DLFILE \
| sed 'N;$!P;$!D;$d'\
| sed -e :a -e '$!N;s/\n//;ta'\
| sed 's/<tr[^>]*>/\n\n/g'\
| sed 's/<td[^>]*>/\t/g'\
| sed 's/<[^>]*>//g' >  $OFILE
rm $DLFILE
